clear all
set more off
set maxvar 20000

*Define directory with input data
cd "Q:"

*Load merged data
use se_analysis_data_cleaned_1996_2018, clear

*Set survey weights
svyset secu [pw=sswgts_ev_scaled], strata(stratum) singleunit(scaled)

*Identify respondents reporting SE in both sources
gen seconsistent=.
replace seconsistent=1 if any_hrs_se==1 & any_ssa_se_lcy==1
gen missingshare=seincmiss_noimp/tot_se_earn_lcy_thou_real

*Intensive margin criteria
gen intcrit=1 if (noimpute==1 & insamp==1 & waveyr>=2004 & waveyr<=2016 & wtcrnh>0 & wtcrnh!=. & (merge_ser_lcy==3 | merge_der_lcy==3) & sswgts_ev_scaled!=. & ((totinc_ssa>0 & totinc_ssa!=.) | (totinc_hrs_noimp>0 & totinc_hrs_noimp!=.)))

*Kernel Densities

*Figure 1 - SE
kdens se_inc2_thou_real_noimp [aw=sswgts_ev_scaled] if intcrit==1 & se_inc2_thou_real_noimp>0 & se_inc2_thou_real_noimp<100 & se_ssa_hrs_lcy==1, ll(0) ul(100) k(e) bw(2.6815) lpattern(longdash) ///
addplot(kdens tot_se_earn_lcy_thou_real [aw=sswgts_ev_scaled] if intcrit==1 & tot_se_earn_lcy_thou_real>0 & tot_se_earn_lcy_thou_real<100 & se_ssa_hrs_lcy==1, ll(0) ul(100) k(e) bw(2.6815) lpattern(dash) ///
|| kdens se_inc2_thou_real_noimp [aw=sswgts_ev_scaled] if intcrit==1 & se_inc2_thou_real_noimp>0 & se_inc2_thou_real_noimp<100 & se_hrs_only_lcy==1, ll(0) ul(100) k(e) bw(2.6815) lpattern(solid) || ///
kdens tot_se_earn_lcy_thou_real [aw=sswgts_ev_scaled] if intcrit==1 & tot_se_earn_lcy_thou_real>0 & tot_se_earn_lcy_thou_real<100 & se_ssa_only_lcy==1, ll(0) ul(100) k(e) bw(2.6815) lpattern(shortdash)) ///
xtitle("Self-Employment Income (000's)") legend(rows(2) label(1 "In both sources - HRS") label(2 "In both sources - SER/DER") label(3 "HRS Only") label(4 "SER/DER Only")) title("")  

graph export "Q:\U\jabramow\CenHRS\RSF Proposal\HRS Data and Code\Longitudinal\fig1_se_inc_dists_by_type.png", replace

*Figure 1 - Wage
kdens wage_inc2_thou_real_noimp [aw=sswgts_ev_scaled] if intcrit==1 & wage_inc2_thou_real_noimp>0 & wage_inc2_thou_real_noimp<100 & e_ssa_hrs_lcy==1, ll(0) ul(100) k(e) bw(1.8309) lpattern(longdash) ///
addplot(kdens non_se_earn_lcy_thou_real [aw=sswgts_ev_scaled] if intcrit==1 & non_se_earn_lcy_thou_real>0 & non_se_earn_lcy_thou_real<100 & e_ssa_hrs_lcy==1, ll(0) ul(100) k(e) bw(1.8309) lpattern(dash) ///
|| kdens wage_inc2_thou_real_noimp [aw=sswgts_ev_scaled] if intcrit==1 & wage_inc2_thou_real_noimp>0 & wage_inc2_thou_real_noimp<100 & e_hrs_only_lcy==1, ll(0) ul(100) k(e) bw(1.8309) lpattern(solid) || ///
kdens non_se_earn_lcy_thou_real [aw=sswgts_ev_scaled] if intcrit==1 & non_se_earn_lcy_thou_real>0 & non_se_earn_lcy_thou_real<100 & e_ssa_only_lcy==1, ll(0) ul(100) k(e) bw(1.8309) lpattern(shortdash)) ///
xtitle("Wage and Salary Employment Income (000's)") legend(rows(2) label(1 "In both sources - HRS") label(2 "In both sources - SER/DER") label(3 "HRS Only") label(4 "SER/DER Only")) title("")  

graph export "Q:\U\jabramow\CenHRS\RSF Proposal\HRS Data and Code\Longitudinal\fig1_wage_inc_dists_by_type.png", replace

*Figure 2 and Appendix 3 Figure
local fig2vars = "seincmiss_noimp wsincmiss_noimp totincmiss_noimp"
local app3vars= "seincmiss_noimp missingshare"

putexcel set "Q:\U\jabramow\CenHRS\RSF Proposal\HRS Data and Code\Longitudinal\fig2_and_appendix3", replace
putexcel B2=("Mean 1") C2=("Mean 2") D2=("Mean 3") E2=("Mean 4") F2=("Mean 5") G2=("Mean 6") H2=("Mean 7") I2=("Mean 8") ///
		 J2=("Mean 9") K2=("Mean 10") L2=("Mean 11") M2=("Mean 12") N2=("Mean 13") O2=("Mean 14") P2=("Mean 15") Q2=("Mean 16") ///
		 R2=("Mean 17") S2=("Mean 18") T2=("Mean 19") U2=("Mean 20") V2=("N")
local i = 3

foreach v in `fig2vars' {
	svy: mean `v' if noimpute==1 & ssatopcoded_lcy==0 & insamp==1 & waveyr>=2004 & waveyr<=2016 & wtcrnh>0 & wtcrnh!=. & (merge_ser_lcy==3 | merge_der_lcy==3) & sswgts_ev_scaled!=. & ((totinc_ssa>0 & totinc_ssa!=.) | (totinc_hrs_noimp>0 & totinc_hrs_noimp!=.)), over(percentile_20)
	matrix v=e(V)
	putexcel A`i'=("`v'") B`i'=matrix(e(b)) V`i'=matrix(e(_N))
	local i = `i'+1
}

forvalues j=1/4{
foreach v in `fig2vars' {
	svy: mean `v' if noimpute==1 & ssatopcoded_lcy==0 & insamp==1 & waveyr>=2004 & waveyr<=2016 & wtcrnh>0 & wtcrnh!=. & (merge_ser_lcy==3 | merge_der_lcy==3) & sswgts_ev_scaled!=. & ((totinc_ssa>0 & totinc_ssa!=.) | (totinc_hrs_noimp>0 & totinc_hrs_noimp!=.)) & intcat==`j', over(percentile_20)
	matrix v=e(V)
	putexcel A`i'=("`v'") B`i'=matrix(e(b)) V`i'=matrix(e(_N))
	local i = `i'+1
}

foreach v in `app3vars' {
	svy: mean `v' if noimpute==1 & ssatopcoded_lcy==0 & insamp==1 & waveyr>=2004 & waveyr<=2016 & wtcrnh>0 & wtcrnh!=. & (merge_ser_lcy==3 | merge_der_lcy==3) & sswgts_ev_scaled!=. & ((totinc_ssa>0 & totinc_ssa!=.) | (totinc_hrs_noimp>0 & totinc_hrs_noimp!=.)) & seconsistent==1, over(percentile_se_20)
	matrix v=e(V)
	putexcel A`i'=("`v'") B`i'=matrix(e(b)) V`i'=matrix(e(_N))
	local i = `i'+1
}
}

*Appendix 3 discussion 

*Net profits
replace se_profit_400=0 if profit==0 | (se_profit==1 & se_profit_400!=1)
tab se_profit_400 if any_hrs_se==1 & any_ssa_se_lcy==0 & insamp==1 & waveyr>=2004 & waveyr<=2016 & wtcrnh>0 & wtcrnh!=. & (merge_ser_lcy==3 | merge_der_lcy==3) & sswgts_ev_scaled!=. 

*Appendix 3 Table

*SOI Gross Receipts Categories
gen hrsseinccat=.
replace hrsseinccat=1 if se_inc2_thou_real_noimp<2.5
replace hrsseinccat=2 if se_inc2_thou_real_noimp>=2.5 & se_inc2_thou_real_noimp<5
replace hrsseinccat=3 if se_inc2_thou_real_noimp>=5 & se_inc2_thou_real_noimp<10
replace hrsseinccat=4 if se_inc2_thou_real_noimp>=10 & se_inc2_thou_real_noimp<25
replace hrsseinccat=5 if se_inc2_thou_real_noimp>=25 & se_inc2_thou_real_noimp<50
replace hrsseinccat=6 if se_inc2_thou_real_noimp>=50 & se_inc2_thou_real_noimp<100
replace hrsseinccat=7 if se_inc2_thou_real_noimp>=100 & se_inc2_thou_real_noimp<200
replace hrsseinccat=8 if se_inc2_thou_real_noimp>=200 & se_inc2_thou_real_noimp<500
replace hrsseinccat=9 if se_inc2_thou_real_noimp>=500 & se_inc2_thou_real_noimp<1000
replace hrsseinccat=10 if se_inc2_thou_real_noimp>=1000 & se_inc2_thou_real_noimp<2500
replace hrsseinccat=11 if se_inc2_thou_real_noimp>=2500 & se_inc2_thou_real_noimp<5000
replace hrsseinccat=12 if se_inc2_thou_real_noimp>=5000 & se_inc2_thou_real_noimp!=.

local app3_hrs_int = "se_inc2_thou_real_noimp_cond"
local app3_ssa_int = "tot_se_earn_lcy_thou_real_cond"

putexcel set "Q:\U\jabramow\CenHRS\RSF Proposal\HRS Data and Code\Longitudinal\appendix3_grossvnet", replace
putexcel B2=("Mean 1") C2=("Mean 2") D2=("Mean 3") E2=("Mean 4") F2=("Mean 5") G2=("Mean 6") H2=("Mean 7") I2=("Mean 8") J2=("N")
local i = 3

svyset secu [pw=sswgts_ev_scaled], strata(stratum) singleunit(scaled)

foreach v in `app3_hrs_int' `app3_ssa_int' {
	svy: total `v' if seconsistent==1 & insamp==1 & waveyr==2016 & wtcrnh>0 & wtcrnh!=. & (merge_ser_lcy==3 | merge_der_lcy==3) & sswgts_ev_scaled!=. & noimpute==1 & ssatopcoded_lcy==0, over(hrsseinccat)
	matrix v=e(V)
	putexcel A`i'=("`v'") B`i'=matrix(e(b)) J`i'=matrix(e(_N))
	local i = `i'+1
}

*Table 6 - Compliance Rate
local table6_hrs_int = "se_inc2_thou_real_noimp_cond wage_inc2_thou_real_noimp_cond"
local table6_ssa_int = "tot_se_earn_lcy_thou_real_cond non_se_earn_lcy_thou_real_cond"

putexcel set "Q:\U\jabramow\CenHRS\RSF Proposal\HRS Data and Code\Longitudinal\table6", replace
putexcel B2=("Mean 1") C2=("Var 1") D2=("N")
local i = 3

svyset secu [pw=sswgts_ev_scaled], strata(stratum) singleunit(scaled)

foreach v in `table6_hrs_int' `table6_ssa_int' {
	svy: total `v' if insamp==1 & waveyr>=2004 & waveyr<=2016 & wtcrnh>0 & wtcrnh!=. & (merge_ser_lcy==3 | merge_der_lcy==3) & sswgts_ev_scaled!=. & noimpute==1 & ssatopcoded_lcy==0
	matrix v=e(V)
	putexcel A`i'=("`v'") B`i'=matrix(e(b)) C`i'=v[1,1] D`i'=matrix(e(_N))
	local i = `i'+1
}

foreach v in `table6_hrs_int' `table6_ssa_int' {
	svy: total `v' if intcat==3 & insamp==1 & waveyr>=2004 & waveyr<=2016 & wtcrnh>0 & wtcrnh!=. & (merge_ser_lcy==3 | merge_der_lcy==3) & sswgts_ev_scaled!=. & noimpute==1 & ssatopcoded_lcy==0
	matrix v=e(V)
	putexcel A`i'=("`v'") B`i'=matrix(e(b)) C`i'=v[1,1] D`i'=matrix(e(_N))
	local i = `i'+1
}